# -*- coding: utf-8 -*-
"""
Created on Wed Dec  2 14:28:15 2020

@author: Da Hui
"""

import time
import csv
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

def run(url,name):
      driver = webdriver.Chrome()
      driver.get(url)
      
      for i in name:
          try:
              searchbox=driver.find_element_by_id('query')
              searchabtn=driver.find_element_by_id('search')
        
              searchbox.clear()
              searchbox.send_keys(i)#在搜索框中搜索
        
              time.sleep(1)
              searchabtn.click()

              flag = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'cmn-clearfix')))
              if flag:
                  driver.execute_script("window.scrollTo(0, 1000);")
                  time.sleep(2)
            
                  soup = BeautifulSoup(driver.page_source, 'lxml')
                  info=soup.select('.basic-info')[0]
                  result=print_bd(info)
                  
                  print2CSV(i,result)
          except IndexError:
              pass
      
      print("over")
      driver.close()
    
    

def print_bd(info):
    try:
        result={}
        keys=info.find_all('dt')
        values=info.find_all('dd')
        num=len(keys)        
        
        for i in range(num):
            val=[]
            strings=values[i].strings
            for j in strings:
                val.append(j)
            for j in val:
                if len(j)<2:
                    val.remove(j)
            val=[x.strip() for x in val if x.strip()!='']
            
            result[keys[i].string.replace('\xa0','')]=val
        
        print("ok")
        return result

    except IndexError:
        pass


def print2CSV(name,datas):
#    print("outputing...")
    file = open('baiduInfo.csv', 'a+', newline='', encoding='utf-8-sig')
    writer = csv.writer(file)
    writer.writerow([name,datas])



if __name__ == '__main__':
#    将节目名放在一个表中
    file = open('JM.csv',encoding='utf-8-sig')
    csv_data = pd.read_csv(file, low_memory = False)  #防止弹出警告
    name=csv_data['Jmname']#读取节目名
#    print(name.head())
    url = 'https://baike.baidu.com/item/%E5%B9%B8%E7%A6%8F%E7%BB%BD%E6%94%BE/7986154'

#   写入表头
    file = open('baiduInfo.csv', 'w+', newline='', encoding='utf-8-sig')
    writer = csv.writer(file)
    writer.writerow(['name','info'])
    
    run(url,name)